import fileinput
import sys
from os import listdir
from os.path import isfile, join
import re
import time

smplfasta = sys.argv[1]
spfasta = sys.argv[2]
check_head = re.compile(r'\>')

smplist = []
smpcnt = 0
for line, strin in enumerate(fileinput.input(smplfasta)):
    if check_head.match(strin):
        smplist.append(strin.strip())
        print(smplist)
        # ['>lpn:lpg0012 hypothetical protein (A)|1']
        # 获取蛋白质头部
        smpcnt += 1

# 读取fasta文件
onlyfiles = [f for f in listdir(spfasta) if isfile(join(spfasta, f))]

fastaDict = {}

for fi in onlyfiles:
    cntnt = ''
    for line, strin in enumerate(fileinput.input(spfasta + '/' + fi)):
        if line == 0:
            cntnt += strin.strip()
        if cntnt in fastaDict:
            # print(strin)
            pass
        fastaDict[cntnt] = fi
# print(fastaDict)

pssmdir = sys.argv[3]
finalist = []
for smp in smplist:
    finalist.append(pssmdir+'/'+fastaDict[smp].split('.')[0]+'.pssm')